#!/bin/bash

# MODEL_PATH='/home/wangxianda/model_zoo/llama_3.2_3b_instruct' #
# MODEL_PATH='/home/wangxianda/model_zoo/llama3_8b_instruct' #
MODEL_PATH='/home/wangxianda/model_zoo/Qwen2.5-14B-Instruct' #
DEVICE='cuda'                                                 # 'cuda' | 'cpu'
DTYPE='float16'           # 'torch.float16' | 'torch.float32'
CONTEXT=20                #
MAX_TOKENS=500                #

streamlit run ./llm_agent/main.py --server.address 127.0.0.1 --server.port 6037 --server.headless True \
    -- --model "$MODEL_PATH" --device "$DEVICE" --dtype "$DTYPE"
# --context_len "$CONTEXT" --max_tokens "$MAX_TOKENS"
